astra <- read.csv("https://raw.githubusercontent.com/QMSS-G5063-2023/course_content/main/Exercises/03_astronauts%20-%20GRADED/astronauts.csv?token=GHSAT0AAAAAAB557SY67DYJGWWUEAM4MRUIY7M3XOA")
head(astra)
##   id number nationwide_number                name
## 1  1      1                 1       Gagarin, Yuri
## 2  2      2                 2      Titov, Gherman
## 3  3      3                 1 Glenn, John H., Jr.
## 4  4      3                 1 Glenn, John H., Jr.
## 5  5      4                 2 Carpenter, M. Scott
## 6  6      5                 2 Nikolayev, Andriyan
##                                            original_name  sex year_of_birth
## 1           ГÐ\220ГÐ\220РÐ\230Ð\235 ЮÑ\200ий Ð\220лекÑ\201еевич male          1934
## 2           ТÐ\230ТОВ ГеÑ\200ман Степанович male          1935
## 3                                    Glenn, John H., Jr. male          1921
## 4                                    Glenn, John H., Jr. male          1921
## 5                                    Carpenter, M. Scott male          1925
## 6 Ð\235Ð\230КОЛÐ\220ЕВ Ð\220ндÑ\200иÑ\217н ГÑ\200игоÑ\200ьевич male          1929
##      nationality military_civilian              selection year_of_selection
## 1 U.S.S.R/Russia          military                 TsPK-1              1960
## 2 U.S.S.R/Russia          military                 TsPK-1              1960
## 3           U.S.          military NASA Astronaut Group 1              1959
## 4           U.S.          military NASA Astronaut Group 2              1959
## 5           U.S.          military                NASA- 1              1959
## 6 U.S.S.R/Russia          military                 TsPK-1              1960
##   mission_number total_number_of_missions occupation year_of_mission
## 1              1                        1      pilot            1961
## 2              1                        1      pilot            1961
## 3              1                        2      pilot            1962
## 4              2                        2        PSP            1998
## 5              1                        1      Pilot            1962
## 6              1                        2      pilot            1962
##     mission_title  ascend_shuttle        in_orbit descend_shuttle hours_mission
## 1        Vostok 1        Vostok 1        Vostok 2        Vostok 3          1.77
## 2        Vostok 2        Vostok 2        Vostok 2        Vostok 2         25.00
## 3            MA-6            MA-6            MA-6            MA-6          5.00
## 4          STS-95          STS-95          STS-95          STS-95        213.00
## 5 Mercury-Atlas 7 Mercury-Atlas 7 Mercury-Atlas 7 Mercury-Atlas 7          5.00
## 6        Vostok 3        Vostok 3        Vostok 3        Vostok 3         94.00
##   total_hrs_sum eva_instances eva_hrs_mission total_eva_hrs
## 1          1.77             0               0             0
## 2         25.30             0               0             0
## 3        218.00             0               0             0
## 4        218.00             0               0             0
## 5          5.00             0               0             0
## 6        519.33             0               0             0
  1. Age & Sex
astra$age_of_selection <- astra$year_of_selection - astra$year_of_birth
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
# Scatter plot
ggplot(astra, aes(x = year_of_birth, y = age_of_selection, color=sex)) +
  geom_point(alpha=0.5) +
  labs(x="Year of Birth", y="Age of Selection", color="sex") +
    theme(legend.position="right") +
  ggtitle("Age of selection by years of birth: gender difference")

# Few female pilots
# Female astraunauts began to be selected from 1940's birth cohort
# Age range differ by gender
# Box plot (show age distribution for each birth cohort, by gender)
astra <- within(astra, {
  birth_cohort <- NA
  birth_cohort[year_of_birth<1930] <- "Before 1920's"
  birth_cohort[year_of_birth>=1930 & year_of_birth <1940] <- "1930's"
  birth_cohort[year_of_birth>=1940 & year_of_birth < 1950] <- "1940's"
  birth_cohort[year_of_birth>=1950 & year_of_birth < 1960] <- "1950's"
  birth_cohort[year_of_birth>=1960 & year_of_birth < 1970] <- "1960's"
  birth_cohort[year_of_birth>=1970 ] <- "1970's and after"
})
ggplot(astra, aes(x=reorder(birth_cohort, year_of_birth, na.ra=TRUE), y=age_of_selection)) +
  geom_boxplot(aes(fill=sex), outlier.colour = "transparent",
               alpha = 0.3) + #<< fill in different colors by "sex" 
  coord_flip() + labs(x="Year of Birth", y="Age of Selection") +
    theme(legend.position="right") +
    ggtitle("Age of selection by birth cohorts: gender difference")

# Few female pilots
# Female astraunauts began to be selected not until 1940's birth cohort
# Age range differ by gender
ggplot(astra, aes(x=reorder(birth_cohort, year_of_birth, na.ra=TRUE), y=age_of_selection)) +
  geom_jitter(shape=21, aes(fill=sex), color="transparent",
              position = position_jitter(w=0.1)) +
  coord_flip() + labs(x="", y="age of selection") +
    theme(legend.position="right") +
    ggtitle("Age of selection by years of birth: gender difference")

# Few female pilots
# Female astraunauts began to be selected from 1940's birth cohort
# Age range differ by gender
  1. Nationality
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
astra <- astra %>%
  group_by(nationality) %>%
  mutate(year_of_first_mission = min(year_of_mission))

astra <- astra %>%
  group_by(mission_title) %>%
  mutate(joint_mission = as.integer(n_distinct(nationality) > 1))

summarize(astra, id, nationality, year_of_first_mission, mission_title, joint_mission, eva_instances, eva_hrs_mission, total_eva_hrs)
## `summarise()` has grouped output by 'mission_title'. You can override using the
## `.groups` argument.
## # A tibble: 1,277 x 8
## # Groups:   mission_title [362]
##    mission_title    id nationality  year_of_first_m~ joint_mission eva_instances
##    <chr>         <int> <chr>                   <int>         <int>         <int>
##  1 1               205 U.S.S.R/Rus~             1961             1             2
##  2 1               284 U.S.S.R/Rus~             1961             1             2
##  3 1               434 U.S.S.R/Rus~             1961             1             0
##  4 1               438 Syria                    1987             1             0
##  5 1               455 U.S.S.R/Rus~             1961             1             0
##  6 1               462 U.S.                     1962             1             0
##  7 1               815 U.S.S.R/Rus~             1961             1             0
##  8 10              386 U.S.S.R/Rus~             1961             1             1
##  9 10              587 U.S.S.R/Rus~             1961             1             0
## 10 10              758 U.S.                     1962             1             2
## # ... with 1,267 more rows, and 2 more variables: eva_hrs_mission <dbl>,
## #   total_eva_hrs <dbl>
ggplot(astra, aes(x = reorder(nationality, -year_of_first_mission), y = year_of_mission, color=joint_mission)) +
  geom_point(alpha=0.5) +
  coord_flip() +
  labs(x="", y="Year of missions", color="joint mission") +
    theme(legend.position="right") +
    ggtitle("World-wide history of space missions: astronauts across countries")

  1. Space walks
library(dplyr)
agg <- astra %>%
  group_by(name) %>%
  summarize('num_of_eva'=sum(eva_instances),
            'total_eva'=sum(total_eva_hrs))
summarize(agg, name, num_of_eva, total_eva)
## # A tibble: 564 x 3
##    name                             num_of_eva total_eva
##    <chr>                                 <int>     <dbl>
##  1 Acaba, Joseph M.                          3      59.8
##  2 Acton, Loren Wilbur                       0       0  
##  3 Adamson, James C.                         0       0  
##  4 Afanasyev, Viktor Mikhaylovich            7     152. 
##  5 Aidyn (Aydyn) Akanovich Aimbetov          0       0  
##  6 Akers, Thomas D.                          4     121. 
##  7 Akiyama, Toyohiro                         0       0  
##  8 Aksyonov, Vladimir                        0       0  
##  9 Al-saud, Sultan bin Salman                0       0  
## 10 Al Mansoori, Hazzaa                       0       0  
## # ... with 554 more rows
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.1.3
dplot <- ggplot(agg, aes(x=num_of_eva, y=total_eva)) +
        geom_bar(stat="summary", fill="steelblue")+
  labs(x="The number of EVA", y="Total hours of EVA")+
    ggtitle("The relationship between the number of EVA and total hours of EVA")
dplot
## No summary function supplied, defaulting to `mean_se()`

  1. Choose my own: trends in hours of space missions over time (1960-2020)
# Whether the mission lasted over 30 days
astra$success <- as.numeric(astra$hours_mission > 720)
head(astra)
## # A tibble: 6 x 29
## # Groups:   mission_title [6]
##      id number nationwide_number name          original_name sex   year_of_birth
##   <int>  <int>             <int> <chr>         <chr>         <chr>         <int>
## 1     1      1                 1 Gagarin, Yuri "ГÐ\u0090Г~ male           1934
## 2     2      2                 2 Titov, Gherm~ "ТИТОВ ~ male           1935
## 3     3      3                 1 Glenn, John ~ "Glenn, John~ male           1921
## 4     4      3                 1 Glenn, John ~ "Glenn, John~ male           1921
## 5     5      4                 2 Carpenter, M~ "Carpenter, ~ male           1925
## 6     6      5                 2 Nikolayev, A~ "Ð\u009dИК~ male           1929
## # ... with 22 more variables: nationality <chr>, military_civilian <chr>,
## #   selection <chr>, year_of_selection <int>, mission_number <int>,
## #   total_number_of_missions <int>, occupation <chr>, year_of_mission <int>,
## #   mission_title <chr>, ascend_shuttle <chr>, in_orbit <chr>,
## #   descend_shuttle <chr>, hours_mission <dbl>, total_hrs_sum <dbl>,
## #   eva_instances <int>, eva_hrs_mission <dbl>, total_eva_hrs <dbl>,
## #   age_of_selection <int>, birth_cohort <chr>, ...
astra$yearbin <- cut(astra$year_of_mission,
                     breaks = c(1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020),
                     labels = c('1910s', '1920s', '1930s', '1940s', '1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s'))

bargraph <- ggplot(astra, aes(x=yearbin, y=success, color = sex))+
  geom_bar(stat = "summary", fun = "mean", fill = "grey", position="dodge") +
  labs(x="", y="% of mission lasted over 30 days")
bargraph

missionhrs <- astra %>%
  group_by(year_of_mission, sex) %>%
  summarize('avghours_mission'=mean(hours_mission),
            'minhours_mission'=min(hours_mission), 
            'maxhours_mission'=max(hours_mission), na.ra=T)
## `summarise()` has grouped output by 'year_of_mission'. You can override using
## the `.groups` argument.
dotlineplot <- ggplot(missionhrs, aes(x=year_of_mission, y=avghours_mission, color = sex))+
  geom_line() + geom_point(size=3) +
  labs(x="", y="Average hours of space missions") +
    ggtitle("Average hours of space missions over time (1960-2020)")
dotlineplot

* The bar graph charts how the percentage of missions whose duration lasted over 30 days(700 hours) changed over every decades, by sex of the astranauts who participated in the missions. The dotted line graph also shows the annual trend of average hours of space missions by sex. Both graphs reveals the interesting pattern that 1) the duration of missions increased over time, particularly in the last decade, and 2) the gender difference in the duration of mission disappeared, and even got reversed in 2010’s. I recommend the dotted line chart as it represents the overall time trends and changes in gender difference more intuitively.

5. Interactivity: make two plots interactive

library(plotly)
## Warning: package 'plotly' was built under R version 4.1.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
dotlineplot2 <- ggplot(missionhrs, aes(x=year_of_mission, y=avghours_mission, color = sex, label = minhours_mission))+
  geom_line() + geom_point(size=3) +
  labs(x="", y="Average hours of space missions") +
  ggtitle("Trend in average hourse of space mission")
ggplotly(dotlineplot2,
         tooltip = c("year_of_mission", "avghours_mission", "minhours_mission"))
boxplot <- plot_ly(astra, y=~age_of_selection, x =~reorder(birth_cohort, year_of_birth), color = ~sex, type = "box")
m <- list(l = 100, r = 100, b = 100, t = 10, pad = 10) #<<
boxplot <- boxplot %>% layout(autosize = F, width = 900, height = 400,
                              boxmode="group", 
                              margin = m,
                              yaxis = list(title = "Age of Selection", showticklabels =T),
                              xaxis = list(title = "Birth cohort"),
                              title = 'Age of selection by birth cohorts'
                              )  
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
boxplot
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning: 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## '_deprecated', 'activeshape', 'annotations', 'autosize', 'autotypenumbers', 'calendar', 'clickmode', 'coloraxis', 'colorscale', 'colorway', 'computed', 'datarevision', 'dragmode', 'editrevision', 'editType', 'font', 'geo', 'grid', 'height', 'hidesources', 'hoverdistance', 'hoverlabel', 'hovermode', 'images', 'legend', 'mapbox', 'margin', 'meta', 'metasrc', 'modebar', 'newshape', 'paper_bgcolor', 'plot_bgcolor', 'polar', 'scene', 'selectdirection', 'selectionrevision', 'separators', 'shapes', 'showlegend', 'sliders', 'smith', 'spikedistance', 'template', 'ternary', 'title', 'transition', 'uirevision', 'uniformtext', 'updatemenus', 'width', 'xaxis', 'yaxis', 'barmode', 'bargap', 'mapType'

6. Data Table

astronaut <- astra %>%
  group_by(name) %>%
  summarize('total_number_of_missions'=max(total_number_of_missions),
            'total_hours_mission'=max(total_hrs_sum),
            'total_eva_hrs'=max(total_eva_hrs))

library("DT")
## Warning: package 'DT' was built under R version 4.1.3
# Fixing Column Labels

library(stringr)
pretty_headers <- 
  gsub("[.]", " ", colnames(astronaut)) %>%
  str_to_title()

# Adding Selection Fields and Sliders
astronaut %>%
  datatable(
    rownames = FALSE,
    colnames = pretty_headers,
    filter = list(position = "top"),
    options = list(
      dom = "Bfrtip",
      buttons = I("colvis"),
      language = list(sSearch = "Filter:")
    ),
    extensions = c("Buttons", "Responsive")
  )